import re
from collections import defaultdict

from wikitextprocessor import Page, Wtp


def analyze_template(wtp: Wtp, page: Page) -> tuple[set[str], bool]:
    """Analyzes a template body and returns a set of the canonicalized
    names of all other templates it calls and a boolean that is True
    if it should be pre-expanded before final parsing and False if it
    need not be pre-expanded.  The pre-expanded flag is determined
    based on that body only; the caller should propagate it to
    templates that include the given template.  This does not work for
    template and template function calls where the name is generated by
    other expansions."""
    if page.redirect_to is not None or page.body is None:
        return set(), False
    included_templates: set[str] = set()

    # Determine if the template starts with a list item
    # XXX should we expand other templates that produce list items???
    contains_list = page.body.startswith(("#", "*", ";", ":"))

    # Remove paired tables.
    # What is left is unpaired tables, which is an indication that a
    # template somewhere should be generating those table eventually,
    # and thus needs to be pre-expanded.
    table_start_pos = []
    table_end_pos = []
    # `[[wikt:/|}]]` in Template:Mon standard keyboard
    # and `{{l|mul|} }}` in Template:punctuation are not end of table token
    # but `|}]]` in Template:Lithuania map is a table
    for m in re.finditer(
        r"""
        (?<!{){\|  # `{|` not after `{`, like `{{{|}}}`
        |
        \|}(?!\s*})  # `|}` not before ` }`
        """,
        page.body,
        re.VERBOSE,
    ):
        if m.group() == "{|":
            table_start_pos.append(m.start())
        else:
            table_end_pos.append(m.end())
    num_table_start = len(table_start_pos)
    num_table_end = len(table_end_pos)
    contains_unpaired_table = num_table_start != num_table_end
    table_start = len(page.body)
    table_end = table_start
    if num_table_start > num_table_end and num_table_end > 0:
        table_start = table_start_pos[num_table_start - num_table_end - 1]
        table_end = table_end_pos[-1]
    elif num_table_start < num_table_end and num_table_start > 0:
        table_start = table_start_pos[0]
        table_end = table_end_pos[num_table_start]
    elif num_table_start > 0 and num_table_end > 0:
        table_start = table_start_pos[0]
        table_end = table_end_pos[-1]
    unpaired_text = page.body[:table_start] + page.body[table_end:]

    # Determine if the template contains table element tokens
    # outside paired table start/end.  We only try to look for
    # these outside templates, as it is common to write each
    # template argument on its own line starting with a "|".
    outside = unpaired_text
    while True:
        # print("=== OUTSIDE ITER")
        prev = outside

        # handle {{{ }}} parameters without templates inside them
        while True:
            newt = re.sub(
                # re.X, ignore white space and comments
                r"""(?sx)\{\{\{                # {{{
                               (    [^{}]      # no {} except...
                               |    \}[^}]     # no }} unless...
                               |    \}\}[^}]   # they're definitely not }}}
                               )*?
                         \}\}\}                # }}}
                """,
                "",
                prev,
            )
            if newt == prev:
                break
            prev = newt
        # print("After arg elim: {!r}".format(newt))

        # Handle templates
        newt = re.sub(
            r"""(?sx)\{\{
                                    (    [^{}]
                                    |    \}[^}]
                                    )*?
                                \}\}""",
            "",
            newt,
        )
        # print("After templ elim: {!r}".format(newt))
        if newt == outside:
            break
        outside = newt
    # Check if the template contains certain table elements
    # start of line plus |+, |- or |!
    m = re.search(r"(?s)(^|\n)(\|\+|\|-|\!)", outside)
    m2 = re.match(r"(?si)\s*(<includeonly>|<!--.*?-->)(\|\||!!)", outside)
    contains_table_element = m is not None or m2 is not None
    # if contains_table_element:
    #     print("contains_table_element {!r} at {}"
    #           .format(m.group(0), m.start()))
    #     print("... {!r} ...".format(outside[m.start() - 10:m.end() + 10]))
    #     print(repr(outside))

    # Check for unpaired HTML tags
    tag_cnts: defaultdict[str, int] = defaultdict(int)
    for m in re.finditer(
        r"(?si)<(/)?({})\b\s*[^>]*(/)?>" r"".format(
            "|".join(wtp.paired_html_tags)
        ),
        outside,
    ):
        start_slash = m.group(1)
        tagname = m.group(2)
        end_slash = m.group(3)
        if start_slash:
            tag_cnts[tagname] -= 1
        elif not end_slash:
            tag_cnts[tagname] += 1
    contains_unbalanced_html = any(v != 0 for v in tag_cnts.values())
    # if contains_unbalanced_html:
    #     print(name, "UNBALANCED HTML")
    #     for k, v in tag_cnts.items():
    #         if v != 0:
    #             print("  {} {}".format(v, k))

    # Determine which other templates are called from unpaired text.
    # None of the flags we currently gather propagate outside a paired
    # table start/end.
    for m in re.finditer(
        # capture the first parameter of a template, ie. the name
        r"""(?sx)(^   |  [^{])            # start
                    (\{\{)?\{\{([^{]*?)   # ( ({{) {{ (name) )
                 (\|  |  \}\})            # | or }}""",
        unpaired_text,
    ):
        called_template = m.group(3)
        called_template = re.sub(r"(?si)<nowiki\s*/>", "", called_template)
        if len(called_template) > 0:
            included_templates.add(called_template)

    # Determine whether this template should be pre-expanded
    pre_expand = (
        contains_list
        or contains_unpaired_table
        or contains_table_element
        or contains_unbalanced_html
    )

    return included_templates, pre_expand
